clear all
cd  "\\micro.intra\Projekt\P0624$\P0624_Gem\Kitschelt\submission"
log using setup_main, replace


***************************************************************
*******************Add data on politicians and*****************
*******************Prepare data for analyses*******************
***************************************************************

use main_data_pop, clear
drop if year <2006
drop if x==. |y==.


keep year p_id party PasNar FInk educ_year age Kon single child galtan* ConcOther -AdminAct x* PrivSec-NonProfit OpFtgLedare BusPers education party lowinc sector cap_exec*  inc_pct inc_pct_emp  Ssyk ssyk96* stdoffshore_pct lowinc politician galtan_pct   y* Ssyk4 missing

gen sex =1 if Kon==2
replace sex=-1 if Kon==1
*Reverse code some of the occupational variables so that they have the correct values
foreach var of varlist ConMacProc DocRecInfo HandMoveObj InterComp AdminAct{
	gen `var'_ = 100-`var'
}
gen pasnar_d=PasNar!=0
gen fink_d=FInk!=0
gen busink=FInk!=0 | PasNar!=0
*Calculates z-scores for all underlying variables
foreach var of varlist galtan_pct    education  sex ConcOther AssCareOther NegConSolv ServOriCare SocPercep ///
ConMacProc_ DocRecInfo_ HandMoveObj_ InterComp_ AdminAct_ stdoffshore_pct sector cap_exec_alt /// 
PrivSec-NonProfit inc_pct  busink OpFtgLedare BusPers  {
	egen std_`var'=std(`var')
}
*Define alternative index definitions
egen std_galtan_std =std(galtan_std )
replace std_PubSec=-std_PubSec
egen y_std= rowmean(std_galtan_pct    std_education  std_sex )
egen x_std= rowmean(std_stdoffshore_pct std_sector std_cap_exec_alt )
gen y_eq = galtan_pct/3 + education/3 + (Kon-1.5)/3
gen x_eq = stdoffshore_pct/3+ sector/3 + cap_exec_alt/3 
egen y_istd= rowmean(std_ConcOther-std_AdminAct_   std_education  std_sex )
egen x_istd= rowmean(std_stdoffshore_pct std_PrivSec-std_BusPers )
egen y_wistd= rowmean(std_galtan_pct std_education  std_sex )
egen x1=rowmean( std_PrivSec-std_NonProfit )
egen x2=rowmean( std_inc_pct-std_BusPers )
egen std_x1 =std(x1 )
egen std_x2 =std(x2 )
egen x_wistd= rowmean(std_stdoffshore_pct std_x1 std_x2 )

*Drop irrelvant variables
drop  std_galtan_pct- std_BusPers x1 x2 std_x1 std_x2 std_galtan_std
*COnstruct alterantive index based on minimum and maximum values of underlying variables
foreach var of varlist galtan ConcOther AssCareOther NegConSolv ServOriCare SocPercep  ConMacProc_ DocRecInfo_ HandMoveObj_ InterComp_ AdminAct_  PrivSec-NonProfit inc_pct  busink OpFtgLedare BusPers  {
	egen max=max(`var')
	egen min=min(`var')
	gen ma_`var'=((`var'-min)/(max-min))*2-1
	drop max min
}
replace ma_PubSec=-ma_PubSec
egen y_ma= rowmean(ma_ConcOther-ma_AdminAct_   education  sex )
egen x_ma= rowmean(stdoffshore_pct ma_PrivSec-ma_BusPers )
egen y_wma= rowmean(ma_galtan education  sex )
egen x1=rowmean( ma_PrivSec-ma_NonProfit )
egen x2=rowmean( ma_inc_pct-ma_BusPers )
egen x_wma= rowmean(stdoffshore_pct x1 x2 )

drop ma_* x1 x2

*Calculated z-scores for main indices and alternative indices
foreach var of varlist x y y_std- x_wma  {
	egen std_`var'=std(`var')
}
*Merge with data on politicians
gen valar = year
replace valar=2014 if year==2012
joinby p_id valar using "\\micro.intra\Projekt\P0624$\P0624_Gem\Data Extraction and Files\Data files\politicians combined.dta", unmatched(using)

*Define variables for first year someone is elected
bysort p_id : egen fy =min(valar) 
gen first_nom=valar==fy

*Define dymmy for parties for all levels of government
replace parti_initial=parti_initial_ri if parti_initial==""
replace parti_initial=parti_initial_lt if parti_initial==""
keep if parti_initial!="" & parti_initial!="L"
*Define English party initials
gen party_ineng=parti_initial
keep if parti_initial!="" & parti_initial!="L"
replace  party_ineng="" if parti_initial=="L"
replace  party_ineng="L" if parti_initial=="V"
replace  party_ineng="Cons" if parti_initial=="M"
replace  party_ineng="Lib" if parti_initial=="F"
replace  party_ineng="Cen" if parti_initial=="C"
replace  party_ineng="Cd" if parti_initial=="K"
replace  party_ineng="Sd" if parti_initial=="B"

*replce list reank variable to zero for those that are nominated to parliament or regional council but not to municipal council
replace nrinom_hl=0 if nom_ri==1 & nrinom_hl==.
replace nrinom_hl=0 if nom_lt==1 & nrinom_hl==.


keep if parti_initial!="" & parti_initial!="L" & llkk!=.

*Define indicatro for municipality and year
egen my=group(llkk valar)
*Define party dummies and dummies for analysis samples
gen pos=1 if parti_initial=="G"
replace pos=2 if parti_initial=="B"
replace pos=3 if parti_initial=="S"
replace pos=4 if parti_initial=="M"
gen samp_gb = pos==1 | pos==2
gen samp_bg = pos==1 | pos==2
gen samp_gs = pos==1 | pos==3
gen samp_sm = pos==4 | pos==3
gen samp_ms = pos==4 | pos==3
gen samp_bm = pos==4 | pos==2
gen dum_gb = pos==1 
gen dum_gs = pos==1 
gen dum_sm = pos==3
gen dum_bm = pos==2
gen dum_bg = pos==2
gen dum_ms= pos==4
drop _merge

*define different sample dummies
gen main =   nrinom_hl!=.
gen noimput= politician!=1 & lowinc!=1 &   nrinom_hl!=.
gen elec=(vald_ri==1|vald_lt==1 |vald==1)
gen parl=  (nom_ri==1)  & parti_initial!=""
gen age_28_32 =   nrinom_hl!=. & age>=28 & age<=32
gen age_ret=age>=65
gen listetta=nrinom_hl==1 & elec==1 
*Define index variables for alternative samples
 foreach var in std_y std_x {
     foreach samp in noimput  age_28_32 first_nom{
	     gen `var'_`samp' = `var' if `samp'==1
	 }
 }

*Define variables for being terms previously nominated and elected
gen prev_nom=0
gen prev_elec=0
forvalues n=1/5{
	replace prev_nom=prev_nom+1 if  (nrinom[_n-`n']!=. |main[_n-`n']==1) & p_id[_n-`n']==p_id
	replace prev_elec=prev_elec+1 if  elec[_n-`n']==1  & p_id[_n-`n']==p_id
}

*Drop political data for years no usedin analysis
drop if year<2006 |year==.
	foreach p in  B C F G K M S V{
gen pdum_`p'= parti_initial=="`p'"
}

*Define alternative indices and standardize them

*Define dummies for being nominated in all years and for the first time in 2006
bysort p_id: egen no_obs= sum(main)
gen temp= first_nom if valar==2006
bysort p_id: egen first_nom_06= max(temp)
drop temp 
gen nom_all = no_obs==3
gen nom_all_fn06 = no_obs==3 &  first_nom_06==1


***Add information on birth region and define dummies for being an immigrad
joinby p_id using "immigrants.dta"  ,  unmatched(master)
gen nordic=  inv1g_ejn ==0 &  inv1g_ejsv==1
gen european= inv1g_ejn ==1 &  inv1g_eje==0
gen non_european =  inv1g_eje==1

*Add data sets defined in set up of underlying files
drop _merge
joinby p_id using poor_parent, unmatched(master)
drop _merge
joinby p_id using "\\micro.intra\Projekt\P0624$\P0624_Gem\Political dynasties\Occupational following\sei parents 1982.dta", unmatched(master)
drop _merge
joinby p_id year using sei_own, unmatched(master)
drop _merge
joinby p_id year using oesch, unmatched(master)
drop _merge
joinby p_id year using sib_index, unmatched(master)
drop _merge
joinby p_id using parent_id, unmatched(master)
drop _merge
joinby p_id using parents_kit, unmatched(master)
drop _merge
joinby p_id year using index_nopbb, unmatched(master)
drop _merge
joinby p_id year using index_05pbb, unmatched(master)
drop _merge
joinby p_id  using index_avg, unmatched(master)
drop _merge
joinby p_id year using pc_index, unmatched(master)

*Define paretnal dummies
bysort p_id: egen count_ind=sum (nom)
bysort   LopNr_BioFar: egen count_fath= sum (nom)
bysort   LopNr_BioMor: egen count_moth= sum (nom)
gen par_id= LopNr_BioMor if count_moth>count_ind & count_ind!=. & count_moth!=.
replace par_id= LopNr_BioFar if count_fath>count_ind & count_ind!=. & count_fath!=. & par_id!=. 
		
*Define parental indices and variables for closeness of parental inidces
		
gen birthyear = year-age
foreach var in y x{
    gen `var'_father_adol =  `var'_80_father if birthyear>=1962 &  birthyear<=1966
	replace `var'_father_adol =  `var'_85_father if birthyear>=1967 &  birthyear<=1961
	replace `var'_father_adol =  `var'_90_father if birthyear>=1972 &  birthyear<=1976
	gen `var'_mother_adol =  `var'_80_mother if birthyear>=1962 &  birthyear<=1966
	replace `var'_mother_adol =  `var'_85_mother if birthyear>=1967 &  birthyear<=1961
	replace `var'_mother_adol =  `var'_90_mother if birthyear>=1972 &  birthyear<=1976
	egen `var'_parent_adol =  rowmean( `var'_mother_adol `var'_father_adol )
}

gen y_pardiff = abs( (y_mother_adol-.5)- y_father_adol)
gen x_pardiff = abs( x_mother_adol-x_father_adol)
gen x_parhom=x_pardiff<.469697 if x_pardiff!=.
gen y_parhom=y_pardiff<.3412247  if y_pardiff!=.
	
foreach var in x y{
gen `var'_fath_hom = `var'_father_adol*`var'_parhom
}
*define variables for parents in politics
gen party_parent= party_father
replace  party_parent= party_mother if party_parent==""
gen par_G= party_father=="G" |  party_mother=="G" if    party_parent!=""
gen par_B= party_father=="B" |  party_mother=="B" if    party_parent!=""
gen par_S= party_father=="S" |  party_mother=="S" if    party_parent!=""
gen par_M= party_father=="M" |  party_mother=="M" if   party_parent!=""

*Define variables for Kitchelt and Rehms variables
gen author="prof" if oesch==5 | oesch==9 | oesch==13
replace author="ass_prof" if oesch==6  | oesch==10 | oesch==14
replace author="skill_rout" if oesch==7  | oesch==11 | oesch==15
replace author="unskill_rout" if oesch==8  | oesch==12 | oesch==16
gen author_pos=.75 if oesch==5  | oesch== 9| oesch==13
replace author_pos=.25 if oesch==6  | oesch== 10| oesch==14
replace author_pos=-.25 if oesch==7  | oesch==11 | oesch==15
replace author_pos=-.75 if oesch==8  | oesch==12 | oesch==16
gen task="org" if oesch==9 | oesch==10 | oesch==11  | oesch==12
replace task="tech" if oesch==5  | oesch==6 | oesch==7 | oesch==8
replace task="inter" if oesch== 13 | oesch==14 | oesch==15  | oesch==16
gen task_pos=-.67 if  oesch==9 | oesch==10 | oesch==11  | oesch==12
replace task_pos=0 if  oesch==5  | oesch==6 | oesch==7 | oesch==8
replace task_pos=.67  if  oesch== 13 | oesch==14 | oesch==15  | oesch==16
gen task_author=task if author_pos>0 & author_pos!=.
replace task_author="unskill" if author_pos<0 & author_pos!=.
tab task_author, gen(ta_)
ren ta_1 ta_skill_int
ren ta_2 ta_skill_org
ren ta_3 ta_skill_tech
ren ta_4 ta_unskill
tab task, gen(t_)
ren t_1 t_inter
ren t_2 t_org
ren t_3 t_tech
tab author, gen(a_)
ren a_1 a_associate
ren a_2 a_prof
ren a_3 a_skill
ren a_4 a_unskill

*Define 8 category version of Oescs schema
gen oesch_8cat=3 if oesch==5 |oesch==6
replace oesch_8cat=4 if oesch==7 | oesch==8
replace oesch_8cat=5 if oesch==9 | oesch==10
replace oesch_8cat=6 if oesch==11 | oesch==12
replace oesch_8cat=7 if oesch==13 | oesch==14
replace oesch_8cat=8 if oesch==15 | oesch==16
replace oesch_8cat=1 if oesch==1 | oesch==2
replace oesch_8cat=2 if oesch==3 | oesch==4
tab oesch_8cat, gen (oesch_8cat_)
*Define EGP schema
replace modsei=int(modsei/10)
replace modsei=modsei+1 if modsei>3 & modsei!=.
replace modsei=4 if modsei==9
replace modsei=7 if modsei==8
tab modsei, gen(egp_)
*Save data
save main_data_pol, replace
log close